import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
import xgboost as xgb
import optuna
import warnings
warnings.filterwarnings('ignore')
/Users/admin/opt/anaconda3/envs/python38/lib/python3.8/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead. from pandas import MultiIndex, Int64Index
df_train = pd.read_csv('../data/Preprocessed_data_with_date/airplane_train_processed_date.csv')
df_val = pd.read_csv('../data/Preprocessed_data_with_date/airplane_test_processed_date.csv')
features = ['Gender_Female', 'Customer Type_Loyal Customer',
'Type of Travel_Business travel',
'Type of Travel_Personal Travel', 'Class_Business',
'Class_Eco', 'Age',
'Flight Distance', 'Departure Delay in Minutes',
'Arrival Delay in Minutes', 'Inflight wifi service',
'Departure/Arrival time convenient',
'Ease of Online booking', 'Gate location',
'Food and drink', 'Online boarding',
'Seat comfort', 'Inflight entertainment',
'On-board service', 'Leg room service',
'Baggage handling', 'Checkin service',
'Inflight service', 'Cleanliness']
le = LabelEncoder()
X_train = df_train[features]
y_train = df_train['satisfaction']
y_train = le.fit_transform(y_train)
X_val = df_val[features]
y_val = df_val['satisfaction']
y_val = le.fit_transform(y_val)
X_train.shape, X_val.shape, y_train.shape, y_val.shape
((102825, 24), (25976, 24), (102825,), (25976,))
def objective(trial,data=X_train,target=y_train):
train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.3,random_state=15)
param = {
#'tree_method':'gpu_hist', # this parameter means using the GPU when training our model to speedup the training process
'lambda': trial.suggest_loguniform('lambda', 1e-3, 10.0),
'alpha': trial.suggest_loguniform('alpha', 1e-3, 10.0),
'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.5,0.6,0.7,0.8,0.9, 1.0]),
'subsample': trial.suggest_categorical('subsample', [0.4,0.5,0.6,0.7,0.8,1.0]),
'learning_rate': trial.suggest_categorical('learning_rate', [0.008,0.012,0.016,0.02]),
'n_estimators': 1000, #as original model
'max_depth': trial.suggest_categorical('max_depth', [5,10,15,20,25,30,37,40]),
'random_state': 15,
'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
}
model = xgb.XGBClassifier(**param)
model.fit(train_x,train_y,eval_set=[(test_x,test_y)],early_stopping_rounds=100,verbose=False)
preds = model.predict(test_x)
accuracy = accuracy_score(test_y, preds)
return accuracy
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)
[I 2023-04-25 14:58:10,559] A new study created in memory with name: no-name-24e61654-b9ab-448d-aa64-c22453ec1c69
[14:58:10] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 14:59:03,056] Trial 0 finished with value: 0.9271913900414938 and parameters: {'lambda': 0.09686253470896354, 'alpha': 0.30928604597111486, 'colsample_bytree': 0.5, 'subsample': 0.4, 'learning_rate': 0.016, 'max_depth': 15, 'min_child_weight': 294}. Best is trial 0 with value: 0.9271913900414938.
[14:59:03] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:00:46,606] Trial 1 finished with value: 0.9555886929460581 and parameters: {'lambda': 0.001746736475085981, 'alpha': 7.797221849627294, 'colsample_bytree': 0.9, 'subsample': 0.6, 'learning_rate': 0.02, 'max_depth': 30, 'min_child_weight': 45}. Best is trial 1 with value: 0.9555886929460581.
[15:00:46] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:01:52,592] Trial 2 finished with value: 0.9449234958506224 and parameters: {'lambda': 5.741405136608666, 'alpha': 0.025981974372041777, 'colsample_bytree': 0.7, 'subsample': 0.5, 'learning_rate': 0.016, 'max_depth': 20, 'min_child_weight': 115}. Best is trial 1 with value: 0.9555886929460581.
[15:01:52] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:03:16,198] Trial 3 finished with value: 0.9492025414937759 and parameters: {'lambda': 1.4393869359566602, 'alpha': 2.7094074905751686, 'colsample_bytree': 0.7, 'subsample': 1.0, 'learning_rate': 0.016, 'max_depth': 37, 'min_child_weight': 167}. Best is trial 1 with value: 0.9555886929460581.
[15:03:16] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:05:07,621] Trial 4 finished with value: 0.9465119294605809 and parameters: {'lambda': 2.2830629284888793, 'alpha': 0.03165941469169312, 'colsample_bytree': 0.7, 'subsample': 0.8, 'learning_rate': 0.02, 'max_depth': 5, 'min_child_weight': 160}. Best is trial 1 with value: 0.9555886929460581.
[15:05:08] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:06:30,855] Trial 5 finished with value: 0.9459284232365145 and parameters: {'lambda': 0.03218607375001824, 'alpha': 0.2864787340762842, 'colsample_bytree': 0.9, 'subsample': 1.0, 'learning_rate': 0.02, 'max_depth': 20, 'min_child_weight': 282}. Best is trial 1 with value: 0.9555886929460581.
[15:06:31] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:07:33,509] Trial 6 finished with value: 0.931891856846473 and parameters: {'lambda': 0.009065998759967924, 'alpha': 0.017701028006641615, 'colsample_bytree': 0.7, 'subsample': 0.6, 'learning_rate': 0.008, 'max_depth': 30, 'min_child_weight': 223}. Best is trial 1 with value: 0.9555886929460581.
[15:07:33] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:08:21,340] Trial 7 finished with value: 0.9249546161825726 and parameters: {'lambda': 0.4258219785915395, 'alpha': 0.02432123144788469, 'colsample_bytree': 0.7, 'subsample': 0.4, 'learning_rate': 0.008, 'max_depth': 40, 'min_child_weight': 262}. Best is trial 1 with value: 0.9555886929460581.
[15:08:21] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:09:20,962] Trial 8 finished with value: 0.9321836099585062 and parameters: {'lambda': 0.004201771581233701, 'alpha': 0.005898749994938761, 'colsample_bytree': 0.5, 'subsample': 0.7, 'learning_rate': 0.008, 'max_depth': 37, 'min_child_weight': 237}. Best is trial 1 with value: 0.9555886929460581.
[15:09:21] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:10:30,400] Trial 9 finished with value: 0.9456690871369294 and parameters: {'lambda': 0.009275309255037608, 'alpha': 0.23538864544701601, 'colsample_bytree': 0.7, 'subsample': 1.0, 'learning_rate': 0.02, 'max_depth': 20, 'min_child_weight': 265}. Best is trial 1 with value: 0.9555886929460581.
[15:10:30] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:12:50,094] Trial 10 finished with value: 0.959478734439834 and parameters: {'lambda': 0.0010237219989266593, 'alpha': 8.422428785995342, 'colsample_bytree': 0.6, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 4}. Best is trial 10 with value: 0.959478734439834.
[15:12:50] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:16:13,995] Trial 11 finished with value: 0.9590248962655602 and parameters: {'lambda': 0.0011175756799705267, 'alpha': 6.642762883305675, 'colsample_bytree': 0.6, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 9}. Best is trial 10 with value: 0.959478734439834.
[15:16:14] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:18:28,775] Trial 12 finished with value: 0.9591545643153527 and parameters: {'lambda': 0.001105547847223136, 'alpha': 6.886012938403033, 'colsample_bytree': 0.6, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 9}. Best is trial 10 with value: 0.959478734439834.
[15:18:29] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:20:11,329] Trial 13 finished with value: 0.950434387966805 and parameters: {'lambda': 0.0013995311463188343, 'alpha': 1.845907714592146, 'colsample_bytree': 0.6, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 71}. Best is trial 10 with value: 0.959478734439834.
[15:20:11] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:22:48,418] Trial 14 finished with value: 0.9595759854771784 and parameters: {'lambda': 0.00469961180646835, 'alpha': 1.3507968101855052, 'colsample_bytree': 0.6, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 7}. Best is trial 14 with value: 0.9595759854771784.
[15:22:48] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:24:01,763] Trial 15 finished with value: 0.9481651970954357 and parameters: {'lambda': 0.0053396541384116555, 'alpha': 1.1551470371682457, 'colsample_bytree': 0.8, 'subsample': 0.5, 'learning_rate': 0.012, 'max_depth': 10, 'min_child_weight': 76}. Best is trial 14 with value: 0.9595759854771784.
[15:24:02] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:25:31,901] Trial 16 finished with value: 0.9548106846473029 and parameters: {'lambda': 0.024946259536618134, 'alpha': 0.9696775467119231, 'colsample_bytree': 0.6, 'subsample': 0.7, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 48}. Best is trial 14 with value: 0.9595759854771784.
[15:25:32] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:27:14,835] Trial 17 finished with value: 0.9497860477178424 and parameters: {'lambda': 0.003686319356440446, 'alpha': 2.55757697687791, 'colsample_bytree': 1.0, 'subsample': 0.8, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 108}. Best is trial 14 with value: 0.9595759854771784.
[15:27:15] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:28:45,175] Trial 18 finished with value: 0.9561721991701245 and parameters: {'lambda': 0.016524847269376267, 'alpha': 0.7917391247683064, 'colsample_bytree': 0.6, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 15, 'min_child_weight': 30}. Best is trial 14 with value: 0.9595759854771784.
[15:28:45] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:30:05,823] Trial 19 finished with value: 0.9462525933609959 and parameters: {'lambda': 0.0031440437547235454, 'alpha': 9.033069546807079, 'colsample_bytree': 1.0, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 10, 'min_child_weight': 102}. Best is trial 14 with value: 0.9595759854771784.
[15:30:06] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:30:59,098] Trial 20 finished with value: 0.9564315352697096 and parameters: {'lambda': 0.0714917977514504, 'alpha': 0.0015837108209135506, 'colsample_bytree': 0.8, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 5, 'min_child_weight': 1}. Best is trial 14 with value: 0.9595759854771784.
[15:30:59] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:33:10,979] Trial 21 finished with value: 0.9573392116182573 and parameters: {'lambda': 0.0011894865658659552, 'alpha': 4.071127167059647, 'colsample_bytree': 0.6, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 24}. Best is trial 14 with value: 0.9595759854771784.
[15:33:11] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:34:44,509] Trial 22 finished with value: 0.9524766597510373 and parameters: {'lambda': 0.001973038704644662, 'alpha': 4.346837280922635, 'colsample_bytree': 0.6, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 58}. Best is trial 14 with value: 0.9595759854771784.
[15:34:46] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:38:14,675] Trial 23 finished with value: 0.9596408195020747 and parameters: {'lambda': 0.0010036432605233932, 'alpha': 9.044483811054086, 'colsample_bytree': 0.6, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 3}. Best is trial 23 with value: 0.9596408195020747.
[15:38:15] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:39:40,233] Trial 24 finished with value: 0.9475492738589212 and parameters: {'lambda': 0.0030041602672202044, 'alpha': 9.980316569157457, 'colsample_bytree': 0.6, 'subsample': 0.6, 'learning_rate': 0.012, 'max_depth': 40, 'min_child_weight': 83}. Best is trial 23 with value: 0.9596408195020747.
[15:39:40] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:41:51,541] Trial 25 finished with value: 0.9446317427385892 and parameters: {'lambda': 0.007234079630288269, 'alpha': 3.4755188880480103, 'colsample_bytree': 0.6, 'subsample': 0.7, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 131}. Best is trial 23 with value: 0.9596408195020747.
[15:41:51] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:42:44,302] Trial 26 finished with value: 0.9321187759336099 and parameters: {'lambda': 0.0027719453994463164, 'alpha': 1.5918337346232623, 'colsample_bytree': 0.6, 'subsample': 0.4, 'learning_rate': 0.012, 'max_depth': 25, 'min_child_weight': 189}. Best is trial 23 with value: 0.9596408195020747.
[15:42:44] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:45:00,626] Trial 27 finished with value: 0.9566908713692946 and parameters: {'lambda': 0.0022371100579773756, 'alpha': 0.5303455778803517, 'colsample_bytree': 0.6, 'subsample': 0.8, 'learning_rate': 0.008, 'max_depth': 25, 'min_child_weight': 30}. Best is trial 23 with value: 0.9596408195020747.
[15:45:00] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:46:30,377] Trial 28 finished with value: 0.95607494813278 and parameters: {'lambda': 0.005725535310789027, 'alpha': 1.8222384785507555, 'colsample_bytree': 0.5, 'subsample': 0.5, 'learning_rate': 0.016, 'max_depth': 25, 'min_child_weight': 30}. Best is trial 23 with value: 0.9596408195020747.
[15:46:30] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[I 2023-04-25 15:49:36,521] Trial 29 finished with value: 0.9597380705394191 and parameters: {'lambda': 0.001001847007957527, 'alpha': 4.069392525142848, 'colsample_bytree': 1.0, 'subsample': 0.6, 'learning_rate': 0.016, 'max_depth': 15, 'min_child_weight': 1}. Best is trial 29 with value: 0.9597380705394191.
study.trials_dataframe()
| number | value | datetime_start | datetime_complete | duration | params_alpha | params_colsample_bytree | params_lambda | params_learning_rate | params_max_depth | params_min_child_weight | params_subsample | state | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0.927191 | 2023-04-25 14:58:10.566829 | 2023-04-25 14:59:03.049852 | 0 days 00:00:52.483023 | 0.309286 | 0.5 | 0.096863 | 0.016 | 15 | 294 | 0.4 | COMPLETE |
| 1 | 1 | 0.955589 | 2023-04-25 14:59:03.070811 | 2023-04-25 15:00:46.606033 | 0 days 00:01:43.535222 | 7.797222 | 0.9 | 0.001747 | 0.020 | 30 | 45 | 0.6 | COMPLETE |
| 2 | 2 | 0.944923 | 2023-04-25 15:00:46.607821 | 2023-04-25 15:01:52.591897 | 0 days 00:01:05.984076 | 0.025982 | 0.7 | 5.741405 | 0.016 | 20 | 115 | 0.5 | COMPLETE |
| 3 | 3 | 0.949203 | 2023-04-25 15:01:52.593881 | 2023-04-25 15:03:16.197374 | 0 days 00:01:23.603493 | 2.709407 | 0.7 | 1.439387 | 0.016 | 37 | 167 | 1.0 | COMPLETE |
| 4 | 4 | 0.946512 | 2023-04-25 15:03:16.199214 | 2023-04-25 15:05:07.619473 | 0 days 00:01:51.420259 | 0.031659 | 0.7 | 2.283063 | 0.020 | 5 | 160 | 0.8 | COMPLETE |
| 5 | 5 | 0.945928 | 2023-04-25 15:05:07.632508 | 2023-04-25 15:06:30.855322 | 0 days 00:01:23.222814 | 0.286479 | 0.9 | 0.032186 | 0.020 | 20 | 282 | 1.0 | COMPLETE |
| 6 | 6 | 0.931892 | 2023-04-25 15:06:30.857042 | 2023-04-25 15:07:33.509319 | 0 days 00:01:02.652277 | 0.017701 | 0.7 | 0.009066 | 0.008 | 30 | 223 | 0.6 | COMPLETE |
| 7 | 7 | 0.924955 | 2023-04-25 15:07:33.510947 | 2023-04-25 15:08:21.339661 | 0 days 00:00:47.828714 | 0.024321 | 0.7 | 0.425822 | 0.008 | 40 | 262 | 0.4 | COMPLETE |
| 8 | 8 | 0.932184 | 2023-04-25 15:08:21.341632 | 2023-04-25 15:09:20.961567 | 0 days 00:00:59.619935 | 0.005899 | 0.5 | 0.004202 | 0.008 | 37 | 237 | 0.7 | COMPLETE |
| 9 | 9 | 0.945669 | 2023-04-25 15:09:20.963731 | 2023-04-25 15:10:30.399487 | 0 days 00:01:09.435756 | 0.235389 | 0.7 | 0.009275 | 0.020 | 20 | 265 | 1.0 | COMPLETE |
| 10 | 10 | 0.959479 | 2023-04-25 15:10:30.401036 | 2023-04-25 15:12:50.092584 | 0 days 00:02:19.691548 | 8.422429 | 0.6 | 0.001024 | 0.012 | 25 | 4 | 0.6 | COMPLETE |
| 11 | 11 | 0.959025 | 2023-04-25 15:12:50.095309 | 2023-04-25 15:16:13.988827 | 0 days 00:03:23.893518 | 6.642763 | 0.6 | 0.001118 | 0.012 | 25 | 9 | 0.6 | COMPLETE |
| 12 | 12 | 0.959155 | 2023-04-25 15:16:14.034563 | 2023-04-25 15:18:28.775334 | 0 days 00:02:14.740771 | 6.886013 | 0.6 | 0.001106 | 0.012 | 25 | 9 | 0.6 | COMPLETE |
| 13 | 13 | 0.950434 | 2023-04-25 15:18:28.777167 | 2023-04-25 15:20:11.328674 | 0 days 00:01:42.551507 | 1.845908 | 0.6 | 0.001400 | 0.012 | 25 | 71 | 0.6 | COMPLETE |
| 14 | 14 | 0.959576 | 2023-04-25 15:20:11.330472 | 2023-04-25 15:22:48.416722 | 0 days 00:02:37.086250 | 1.350797 | 0.6 | 0.004700 | 0.012 | 25 | 7 | 0.6 | COMPLETE |
| 15 | 15 | 0.948165 | 2023-04-25 15:22:48.420080 | 2023-04-25 15:24:01.763001 | 0 days 00:01:13.342921 | 1.155147 | 0.8 | 0.005340 | 0.012 | 10 | 76 | 0.5 | COMPLETE |
| 16 | 16 | 0.954811 | 2023-04-25 15:24:01.764747 | 2023-04-25 15:25:31.901254 | 0 days 00:01:30.136507 | 0.969678 | 0.6 | 0.024946 | 0.012 | 25 | 48 | 0.7 | COMPLETE |
| 17 | 17 | 0.949786 | 2023-04-25 15:25:31.902862 | 2023-04-25 15:27:14.835313 | 0 days 00:01:42.932451 | 2.557577 | 1.0 | 0.003686 | 0.012 | 25 | 108 | 0.8 | COMPLETE |
| 18 | 18 | 0.956172 | 2023-04-25 15:27:14.838446 | 2023-04-25 15:28:45.174707 | 0 days 00:01:30.336261 | 0.791739 | 0.6 | 0.016525 | 0.012 | 15 | 30 | 0.6 | COMPLETE |
| 19 | 19 | 0.946253 | 2023-04-25 15:28:45.178065 | 2023-04-25 15:30:05.822561 | 0 days 00:01:20.644496 | 9.033070 | 1.0 | 0.003144 | 0.012 | 10 | 102 | 0.6 | COMPLETE |
| 20 | 20 | 0.956432 | 2023-04-25 15:30:05.824652 | 2023-04-25 15:30:59.097459 | 0 days 00:00:53.272807 | 0.001584 | 0.8 | 0.071492 | 0.012 | 5 | 1 | 0.6 | COMPLETE |
| 21 | 21 | 0.957339 | 2023-04-25 15:30:59.099120 | 2023-04-25 15:33:10.979082 | 0 days 00:02:11.879962 | 4.071127 | 0.6 | 0.001189 | 0.012 | 25 | 24 | 0.6 | COMPLETE |
| 22 | 22 | 0.952477 | 2023-04-25 15:33:10.981921 | 2023-04-25 15:34:44.509192 | 0 days 00:01:33.527271 | 4.346837 | 0.6 | 0.001973 | 0.012 | 25 | 58 | 0.6 | COMPLETE |
| 23 | 23 | 0.959641 | 2023-04-25 15:34:44.511250 | 2023-04-25 15:38:14.671175 | 0 days 00:03:30.159925 | 9.044484 | 0.6 | 0.001004 | 0.012 | 25 | 3 | 0.6 | COMPLETE |
| 24 | 24 | 0.947549 | 2023-04-25 15:38:14.696025 | 2023-04-25 15:39:40.231197 | 0 days 00:01:25.535172 | 9.980317 | 0.6 | 0.003004 | 0.012 | 40 | 83 | 0.6 | COMPLETE |
| 25 | 25 | 0.944632 | 2023-04-25 15:39:40.241822 | 2023-04-25 15:41:51.539112 | 0 days 00:02:11.297290 | 3.475519 | 0.6 | 0.007234 | 0.012 | 25 | 131 | 0.7 | COMPLETE |
| 26 | 26 | 0.932119 | 2023-04-25 15:41:51.552917 | 2023-04-25 15:42:44.302056 | 0 days 00:00:52.749139 | 1.591834 | 0.6 | 0.002772 | 0.012 | 25 | 189 | 0.4 | COMPLETE |
| 27 | 27 | 0.956691 | 2023-04-25 15:42:44.303658 | 2023-04-25 15:45:00.625521 | 0 days 00:02:16.321863 | 0.530346 | 0.6 | 0.002237 | 0.008 | 25 | 30 | 0.8 | COMPLETE |
| 28 | 28 | 0.956075 | 2023-04-25 15:45:00.631174 | 2023-04-25 15:46:30.376552 | 0 days 00:01:29.745378 | 1.822238 | 0.5 | 0.005726 | 0.016 | 25 | 30 | 0.5 | COMPLETE |
| 29 | 29 | 0.959738 | 2023-04-25 15:46:30.378325 | 2023-04-25 15:49:36.519617 | 0 days 00:03:06.141292 | 4.069393 | 1.0 | 0.001002 | 0.016 | 15 | 1 | 0.6 | COMPLETE |
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)
print('Best MSE:', study.best_value)
Number of finished trials: 30
Best trial: {'lambda': 0.001001847007957527, 'alpha': 4.069392525142848, 'colsample_bytree': 1.0, 'subsample': 0.6, 'learning_rate': 0.016, 'max_depth': 15, 'min_child_weight': 1}
Best MSE: 0.9597380705394191
final_model = xgb.XGBClassifier(**study.best_trial.params)
final_model.fit(X_train, y_train)
y_val_pred = final_model.predict(X_val)
print("Accuracy on validation set is {}%".format(accuracy_score(y_val, y_val_pred)*100))
[15:49:37] WARNING: /var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_44tbtwf8c1/croots/recipe/xgboost-split_1659548960882/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior. Accuracy on validation set is 95.11087157376039%
optuna.visualization.plot_parallel_coordinate(study)
optuna.visualization.plot_param_importances(study)
optuna.visualization.plot_edf(study)